/* 
    Purpose: Using the 1970 Census, this file takes cleaned variables 
             from 4c and calculates average predicted mother income 
             (i.e., "income scores") at ONLY the occupation x race x south
             level.

    Note: Will create templates/income scores at occ and occ x race levels 
          solely to impute income for missing occ x race x south cells.

    Creates: incomescores_mothers1970_byocc_byr_bys.dta
*/
clear
set more off
cd "$Mydirectory1/1_DataSources/CensusData/"
 
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

*******************
*** TEMPLATES
*******************

/* Note: Census microdata from 4c does not have 
         all 28 coarsened occupations, so will use 
         the template below that does have all occupations. 
*/
    use ./code/OtherCensus_RawData/motheroccej_template_occs.dta, clear 

* Template 1: occupation
preserve 
    
    tempfile template_byocc
    save `template_byocc'
        
restore 

* Template 2: occupation x race
preserve 
    
    expand 2, gen(race)
    replace race=race+1
    
    tempfile template_byocc_byr
    save `template_byocc_byr'
        
restore 

* Template 3: occupation x race x south
preserve 
    
    expand 2, gen(race)
    replace race=race+1
    
    expand 2, gen(south_merge)
    
    tempfile template_byocc_byr_bys
    save `template_byocc_byr_bys'
        
restore 

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

    use ./output/Census1970_mothers_ages30to50.dta, clear   
    gen number=1 

    global income_measures "inctot faminc HHinc"

*******************
*** COLLAPSE 
*******************

foreach x in byocc byocc_byr byocc_byr_bys  {

    if "`x'"=="byocc" local cell "motheroccej"
    if "`x'"=="byocc_byr" local cell "motheroccej race"
    if "`x'"=="byocc_byr_bys" local cell "motheroccej race south_merge"

preserve 
    
    collapse (rawsum) number (mean) $income_measures [aw=perwt], by(`cell') 
    
    foreach c in $income_measures {
    rename `c' avg_`c'_1970_`x'
    label var avg_`c'_1970_`x' "Coarse (mother) income score, average, 1970 using `c'"
    }
    
    tempfile incomescores
    save `incomescores'
    
    use `template_`x''
    merge 1:1 `cell' using `incomescores', nogen
    
    replace number=0 if number==.
    label var number "Number of obs in cell"
    rename number number_1970obs_`x'

    tempfile incomescores_`x'
    save `incomescores_`x''
    
restore 
        
}

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

****************
*** IMPUTATIONS
****************

/*Note: Will impute any missing occ x race x south cells 
        with average value of less granular level. */

    local name1 "byocc"
    local name2 "byocc_byr"
    local name3 "byocc_byr_bys"
        
* Merge all templates together. Bring in template with largest # of cells first.  
    use `incomescores_byocc_byr_bys', clear
    merge m:1 motheroccej race using `incomescores_byocc_byr', nogen
    merge m:1 motheroccej using `incomescores_byocc', nogen

    * Impute occ x race using average occupation value
    foreach c in $income_measures {
    count if avg_`c'_1970_`name2'==.
    replace avg_`c'_1970_`name2' = avg_`c'_1970_`name1' if avg_`c'_1970_`name2'==.
    }
   
    * Impute occ x race x south using average occ x race value
    foreach c in $income_measures {
    count if avg_`c'_1970_`name3'==.
    replace avg_`c'_1970_`name3' = avg_`c'_1970_`name2' if avg_`c'_1970_`name3'==.
    }  

***********
* SAVE
***********

    foreach x in byocc_byr_bys {
    
        if "`x'"=="byocc_byr_bys" local cell "motheroccej race south_merge"

        preserve 
        
        bysort `cell': keep if _n==1
        keep number_1970obs_`x' `cell' avg*`x'
        
        tab motheroccej if avg_HHinc_1970_`x'==.

    ****************************************************    
    /* Last step: impute occ x race x south income 
                  when average income is missing 
                  at all levels                       */
    **************************************************** 
        /* Give clergy (occ=2) average income of 
           social workers (occ=9) in the same race x region */    
        foreach y in 2 {
            foreach race in 1 2 {
                foreach reg in 0 1 {               
                    foreach c in $income_measures {
                        sum avg_`c'_1970_`x' if motheroccej==9 & race==`race' & south_merge==`reg'
                        replace avg_`c'_1970_`x'=`r(mean)' if motheroccej==`y' & race==`race' & south_merge==`reg'
                    }               
                }
            }
        }

        assert avg_HHinc_1970_`x'!=.

        
        compress
        save ./output/incomescores_mothers1970_`x'.dta, replace
        
        restore
    
    }
